import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import openpyxl
import sys
import seaborn as sns
import plotly.express as px # graphing interactive map from data
from plotly import tools
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot, plot
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
sys.setrecursionlimit(10000000)
# Render our plots inline
%matplotlib inline
# Make the graphs a bit prettier, and bigger
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (15, 7)
GTDB_USA_all = pd.read_csv('GTDB_USA.csv', header=0)
GTDB_USA_all
| eventid | iyear | imonth | iday | approxdate | extended | resolution | country | country_txt | region | ... | addnotes | scite1 | scite2 | scite3 | dbsource | INT_LOG | INT_IDEO | INT_MISC | INT_ANY | related | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 197001010002 | 1970 | 1 | 1 | NaN | 0 | NaN | 217 | United States | 1 | ... | The Cairo Chief of Police, William Petersen, r... | "Police Chief Quits," Washington Post, January... | "Cairo Police Chief Quits; Decries Local 'Mili... | Christopher Hewitt, "Political Violence and Te... | Hewitt Project | -9 | -9 | 0 | -9 | NaN |
| 1 | 197001020002 | 1970 | 1 | 2 | NaN | 0 | NaN | 217 | United States | 1 | ... | Damages were estimated to be between $20,000-$... | Committee on Government Operations United Stat... | Christopher Hewitt, "Political Violence and Te... | NaN | Hewitt Project | -9 | -9 | 0 | -9 | NaN |
| 2 | 197001020003 | 1970 | 1 | 2 | NaN | 0 | NaN | 217 | United States | 1 | ... | The New Years Gang issue a communiqué to a loc... | Tom Bates, "Rads: The 1970 Bombing of the Army... | David Newman, Sandra Sutherland, and Jon Stewa... | The Wisconsin Cartographers' Guild, "Wisconsin... | Hewitt Project | 0 | 0 | 0 | 0 | NaN |
| 3 | 197001030001 | 1970 | 1 | 3 | NaN | 0 | NaN | 217 | United States | 1 | ... | Karl Armstrong's girlfriend, Lynn Schultz, dro... | Committee on Government Operations United Stat... | Tom Bates, "Rads: The 1970 Bombing of the Army... | David Newman, Sandra Sutherland, and Jon Stewa... | Hewitt Project | 0 | 0 | 0 | 0 | NaN |
| 4 | 197001050001 | 1970 | 1 | 1 | NaN | 0 | NaN | 217 | United States | 1 | ... | NaN | NaN | NaN | NaN | PGIS | 0 | 0 | 0 | 0 | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 3116 | 202012130030 | 2020 | 12 | 13 | NaN | 0 | NaN | 217 | United States | 1 | ... | The assailant targeted this church three separ... | "Black leaders air fears following church fire... | "Suspicious fire at Black church being investi... | NaN | START Primary Collection | -9 | -9 | 0 | -9 | NaN |
| 3117 | 202012130044 | 2020 | 12 | 13 | NaN | 0 | NaN | 217 | United States | 1 | ... | NaN | "A gunman is dead after a shooting at a New Yo... | "Church-shoot guy riled over Latin America," D... | "NYC cathedral gunman's note says he planned t... | START Primary Collection | -9 | -9 | 0 | -9 | NaN |
| 3118 | 202012150028 | 2020 | 12 | 15 | NaN | 0 | NaN | 217 | United States | 1 | ... | The assailant targeted this church three separ... | "Black leaders air fears following church fire... | "Suspicious fire at Black church being investi... | NaN | START Primary Collection | -9 | -9 | 0 | -9 | NaN |
| 3119 | 202012250003 | 2020 | 12 | 25 | NaN | 0 | NaN | 217 | United States | 1 | ... | There is doubt that this incident meets terror... | "Bomber to neighbor: The world is 'never going... | "FBI: Nashville bomber driven by conspiracies,... | "Behind the Nashville Bombing, a Conspiracy Th... | START Primary Collection | -9 | -9 | 0 | -9 | NaN |
| 3120 | 202012280022 | 2020 | 12 | 28 | NaN | 0 | NaN | 217 | United States | 1 | ... | The assailant targeted this church three separ... | "Black leaders air fears following church fire... | "Suspicious fire at Black church being investi... | NaN | START Primary Collection | -9 | -9 | 0 | -9 | NaN |
3121 rows × 135 columns
GTDB_USA = GTDB_USA_all[['iyear', 'imonth', 'iday', 'provstate','city', 'crit1', 'crit2', 'crit3', 'suicide',
'attacktype1', 'attacktype1_txt', 'targtype1', 'targsubtype1', 'targtype1_txt', 'gname', 'nkill', 'nwound',
'weaptype1', 'weaptype1_txt', 'success']]
GTDB_USA
| iyear | imonth | iday | provstate | city | crit1 | crit2 | crit3 | suicide | attacktype1 | attacktype1_txt | targtype1 | targsubtype1 | targtype1_txt | gname | nkill | nwound | weaptype1 | weaptype1_txt | success | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1970 | 1 | 1 | Illinois | Cairo | 1 | 1 | 1 | 0 | 2 | Armed Assault | 3 | 22.0 | Police | Black Nationalists | 0.0 | 0.0 | 5 | Firearms | 1 |
| 1 | 1970 | 1 | 2 | California | Oakland | 1 | 1 | 1 | 0 | 3 | Bombing/Explosion | 21 | 107.0 | Utilities | Unknown | 0.0 | 0.0 | 6 | Explosives | 1 |
| 2 | 1970 | 1 | 2 | Wisconsin | Madison | 1 | 1 | 1 | 0 | 7 | Facility/Infrastructure Attack | 4 | 28.0 | Military | New Year's Gang | 0.0 | 0.0 | 8 | Incendiary | 1 |
| 3 | 1970 | 1 | 3 | Wisconsin | Madison | 1 | 1 | 1 | 0 | 7 | Facility/Infrastructure Attack | 2 | 21.0 | Government (General) | New Year's Gang | 0.0 | 0.0 | 8 | Incendiary | 1 |
| 4 | 1970 | 1 | 1 | Wisconsin | Baraboo | 1 | 1 | 0 | 0 | 3 | Bombing/Explosion | 4 | 27.0 | Military | Weather Underground, Weathermen | 0.0 | 0.0 | 6 | Explosives | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 3116 | 2020 | 12 | 13 | Massachusetts | Springfield | 1 | 1 | 1 | 0 | 7 | Facility/Infrastructure Attack | 15 | 86.0 | Religious Figures/Institutions | White supremacists/nationalists | 0.0 | 0.0 | 8 | Incendiary | 1 |
| 3117 | 2020 | 12 | 13 | New York | New York City | 1 | 1 | 1 | 1 | 6 | Hostage Taking (Kidnapping) | 15 | 86.0 | Religious Figures/Institutions | Anti-globalization extremists | 1.0 | 0.0 | 5 | Firearms | 0 |
| 3118 | 2020 | 12 | 15 | Massachusetts | Springfield | 1 | 1 | 1 | 0 | 7 | Facility/Infrastructure Attack | 15 | 86.0 | Religious Figures/Institutions | White supremacists/nationalists | 0.0 | 0.0 | 8 | Incendiary | 1 |
| 3119 | 2020 | 12 | 25 | Tennessee | Nashville | 0 | 1 | 1 | 1 | 3 | Bombing/Explosion | 1 | 7.0 | Business | Conspiracy theory extremists | 1.0 | 3.0 | 6 | Explosives | 1 |
| 3120 | 2020 | 12 | 28 | Massachusetts | Springfield | 1 | 1 | 1 | 0 | 7 | Facility/Infrastructure Attack | 15 | 86.0 | Religious Figures/Institutions | White supremacists/nationalists | 0.0 | 0.0 | 8 | Incendiary | 1 |
3121 rows × 20 columns
GTDB_USA.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 3121 entries, 0 to 3120 Data columns (total 20 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 iyear 3121 non-null int64 1 imonth 3121 non-null int64 2 iday 3121 non-null int64 3 provstate 3121 non-null object 4 city 3121 non-null object 5 crit1 3121 non-null int64 6 crit2 3121 non-null int64 7 crit3 3121 non-null int64 8 suicide 3121 non-null int64 9 attacktype1 3121 non-null int64 10 attacktype1_txt 3121 non-null object 11 targtype1 3121 non-null int64 12 targsubtype1 2998 non-null float64 13 targtype1_txt 3121 non-null object 14 gname 3121 non-null object 15 nkill 3051 non-null float64 16 nwound 3031 non-null float64 17 weaptype1 3121 non-null int64 18 weaptype1_txt 3121 non-null object 19 success 3121 non-null int64 dtypes: float64(3), int64(11), object(6) memory usage: 487.8+ KB
GTDB_USA.shape
(3121, 20)
# Rename columns
GTDB_USA.rename(columns={'iyear':'Year','imonth':'Month','iday':'Day', 'provstate':'State','attacktype1':'Attack_Type', 'attacktype1_txt': 'Attack',
'targtype1_txt':'Target','nkill':'Killed', 'nwound':'Wounded','gname':'Group','targtype1':'Target_type',
'weaptype1':'Weapon_type', 'weaptype1_txt':'Weapon'},inplace=True)
GTDB_USA.head()
/var/folders/24/536gs7r91qzd964t2ppqhs6m0000gn/T/ipykernel_88804/1627560304.py:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
| Year | Month | Day | State | city | crit1 | crit2 | crit3 | suicide | Attack_Type | Attack | Target_type | targsubtype1 | Target | Group | Killed | Wounded | Weapon_type | Weapon | success | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1970 | 1 | 1 | Illinois | Cairo | 1 | 1 | 1 | 0 | 2 | Armed Assault | 3 | 22.0 | Police | Black Nationalists | 0.0 | 0.0 | 5 | Firearms | 1 |
| 1 | 1970 | 1 | 2 | California | Oakland | 1 | 1 | 1 | 0 | 3 | Bombing/Explosion | 21 | 107.0 | Utilities | Unknown | 0.0 | 0.0 | 6 | Explosives | 1 |
| 2 | 1970 | 1 | 2 | Wisconsin | Madison | 1 | 1 | 1 | 0 | 7 | Facility/Infrastructure Attack | 4 | 28.0 | Military | New Year's Gang | 0.0 | 0.0 | 8 | Incendiary | 1 |
| 3 | 1970 | 1 | 3 | Wisconsin | Madison | 1 | 1 | 1 | 0 | 7 | Facility/Infrastructure Attack | 2 | 21.0 | Government (General) | New Year's Gang | 0.0 | 0.0 | 8 | Incendiary | 1 |
| 4 | 1970 | 1 | 1 | Wisconsin | Baraboo | 1 | 1 | 0 | 0 | 3 | Bombing/Explosion | 4 | 27.0 | Military | Weather Underground, Weathermen | 0.0 | 0.0 | 6 | Explosives | 0 |
GTDB_USA['Weapon'].value_counts()
Explosives 1441 Incendiary 968 Firearms 488 Melee 68 Unknown 39 Chemical 28 Vehicle (not to include vehicle-borne explosives, i.e., car or truck bombs) 25 Biological 24 Sabotage Equipment 18 Other 17 Fake Weapons 4 Radiological 1 Name: Weapon, dtype: int64
GTDB_USA['Group'].value_counts()
Unknown 606
Anti-Abortion extremists 234
Left-Wing Militants 169
White supremacists/nationalists 131
Fuerzas Armadas de Liberacion Nacional (FALN) 120
...
Lebanese Man 1
Organization Alliance of Cuban Intransigence 1
Mormon Extremist 1
Nuclear Liberation Front 1
Anti-globalization extremists 1
Name: Group, Length: 244, dtype: int64
#GTDB_USA['Recode Group'] = GTDB_USA['Group']
#GTDB_USA['Recode Group'].replace(GTDB_USA['Recode Group'].value_counts()[:15].values, 'Other')
#GTDB_USA.loc[GTDB_USA["Recode Group"] == GTDB_USA['Recode Group'].value_counts()[:15]] = 'Other'
#GTDB_USA['Recode Group'].mask(GTDB_USA['Recode Group'] == GTDB_USA['Recode Group'].value_counts()[:15], 'Other', inplace=True)
#GTDB_USA. head()
#newGroups = GTDB_USA[['Group']]
#newGroups['total'] = newGroups.groupby(['Group']).size()
#newGroups
#top10 = GTDB_USA.nsmallest(10, 'Group')
#GTDB_USA['Recode_Group'] = np.where((GTDB_USA['Group'].eq(top10['Group'])),GTDB_USA['Group'],'Other')
#GTDB_USA['Recode_Group'].value_counts()
#GTDB_USA['success'].value_counts()
#GTDB_USA['Weapon_type'].value_counts()
#GTDB_USA['weapsubtype1'].value_counts()
#GTDB_USA['Target'].value_counts().head(20)
#GTDB_USA['Target_type'].value_counts()
#GTDB_USA['State'].value_counts()
Drop Puerto Rico and US Virgin Islands and unknown locations.
#GTDB_USA = GTDB_USA[GTDB_USA.State != ['Puerto Rico', 'U.S. Virgin Islands']]
GTDB_USA = GTDB_USA[GTDB_USA.State.isin(['Puerto Rico', 'U.S. Virgin Islands', 'Unknown']) == False]
GTDB_USA['State'].value_counts()
California 632 New York 541 Florida 176 Washington 119 Illinois 115 Texas 89 District of Columbia 88 Oregon 72 Massachusetts 68 Ohio 58 Pennsylvania 50 Michigan 50 New Jersey 50 Colorado 48 Missouri 46 Arizona 44 Virginia 43 Wisconsin 43 Maryland 42 Georgia 42 North Carolina 35 Tennessee 34 Louisiana 28 Minnesota 28 Indiana 26 New Mexico 25 Nevada 24 Nebraska 24 Iowa 24 Utah 22 Connecticut 19 Alabama 17 Oklahoma 15 Kansas 14 Idaho 14 Mississippi 14 South Carolina 11 New Hampshire 11 South Dakota 9 Delaware 8 Arkansas 8 Montana 7 Kentucky 7 North Dakota 7 Vermont 5 Hawaii 5 Maine 4 Wyoming 3 Rhode Island 2 West Virginia 2 Alaska 1 Name: State, dtype: int64
GTDB_USA.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 2869 entries, 0 to 3120 Data columns (total 20 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Year 2869 non-null int64 1 Month 2869 non-null int64 2 Day 2869 non-null int64 3 State 2869 non-null object 4 city 2869 non-null object 5 crit1 2869 non-null int64 6 crit2 2869 non-null int64 7 crit3 2869 non-null int64 8 suicide 2869 non-null int64 9 Attack_Type 2869 non-null int64 10 Attack 2869 non-null object 11 Target_type 2869 non-null int64 12 targsubtype1 2751 non-null float64 13 Target 2869 non-null object 14 Group 2869 non-null object 15 Killed 2819 non-null float64 16 Wounded 2801 non-null float64 17 Weapon_type 2869 non-null int64 18 Weapon 2869 non-null object 19 success 2869 non-null int64 dtypes: float64(3), int64(11), object(6) memory usage: 470.7+ KB
Check the data for missing values.
GTDB_USA.isna().sum()
Year 0 Month 0 Day 0 State 0 city 0 crit1 0 crit2 0 crit3 0 suicide 0 Attack_Type 0 Attack 0 Target_type 0 targsubtype1 118 Target 0 Group 0 Killed 50 Wounded 68 Weapon_type 0 Weapon 0 success 0 dtype: int64
To get a better look at the missing data, we can calculate the percentage of missing data for each column.
#missing data
total = GTDB_USA.isnull().sum().sort_values(ascending=False)
percent = (GTDB_USA.isnull().sum()/GTDB_USA.isnull().count()).sort_values(ascending=False)
missing_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
missing_data.head(20)
| Total | Percent | |
|---|---|---|
| targsubtype1 | 118 | 0.041129 |
| Wounded | 68 | 0.023702 |
| Killed | 50 | 0.017428 |
| Year | 0 | 0.000000 |
| Month | 0 | 0.000000 |
| Weapon | 0 | 0.000000 |
| Weapon_type | 0 | 0.000000 |
| Group | 0 | 0.000000 |
| Target | 0 | 0.000000 |
| Target_type | 0 | 0.000000 |
| Attack | 0 | 0.000000 |
| Attack_Type | 0 | 0.000000 |
| suicide | 0 | 0.000000 |
| crit3 | 0 | 0.000000 |
| crit2 | 0 | 0.000000 |
| crit1 | 0 | 0.000000 |
| city | 0 | 0.000000 |
| State | 0 | 0.000000 |
| Day | 0 | 0.000000 |
| success | 0 | 0.000000 |
We will drop all rows with N/A values.
GTDB_USA = GTDB_USA.dropna()
GTDB_USA['Weapon'].value_counts()
Explosives 1133 Incendiary 909 Firearms 445 Melee 66 Chemical 25 Vehicle (not to include vehicle-borne explosives, i.e., car or truck bombs) 25 Biological 23 Sabotage Equipment 18 Unknown 17 Other 17 Fake Weapons 4 Radiological 1 Name: Weapon, dtype: int64
GTDB_USA.isna().sum()
Year 0 Month 0 Day 0 State 0 city 0 crit1 0 crit2 0 crit3 0 suicide 0 Attack_Type 0 Attack 0 Target_type 0 targsubtype1 0 Target 0 Group 0 Killed 0 Wounded 0 Weapon_type 0 Weapon 0 success 0 dtype: int64
GTDB_USA.shape
(2683, 20)
GTDB_USA.describe()
| Year | Month | Day | crit1 | crit2 | crit3 | suicide | Attack_Type | Target_type | targsubtype1 | Killed | Wounded | Weapon_type | success | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 2683.000000 | 2683.000000 | 2683.000000 | 2683.000000 | 2683.000000 | 2683.000000 | 2683.000000 | 2683.000000 | 2683.000000 | 2683.000000 | 2683.000000 | 2683.000000 | 2683.000000 | 2683.000000 |
| mean | 1988.062244 | 6.262766 | 15.305255 | 0.981737 | 0.983228 | 0.992546 | 0.007082 | 4.400671 | 6.766306 | 40.430116 | 1.449124 | 9.773761 | 6.658964 | 0.828550 |
| std | 17.415874 | 3.347272 | 9.168060 | 0.133926 | 0.128441 | 0.086032 | 0.083870 | 2.210432 | 5.754176 | 29.868452 | 38.167953 | 298.031224 | 1.591475 | 0.376972 |
| min | 1970.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 1.000000 | 1.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 |
| 25% | 1972.000000 | 3.000000 | 7.000000 | 1.000000 | 1.000000 | 1.000000 | 0.000000 | 3.000000 | 2.000000 | 14.000000 | 0.000000 | 0.000000 | 6.000000 | 1.000000 |
| 50% | 1982.000000 | 6.000000 | 15.000000 | 1.000000 | 1.000000 | 1.000000 | 0.000000 | 3.000000 | 5.000000 | 40.000000 | 0.000000 | 0.000000 | 6.000000 | 1.000000 |
| 75% | 2001.000000 | 9.000000 | 23.000000 | 1.000000 | 1.000000 | 1.000000 | 0.000000 | 7.000000 | 14.000000 | 68.000000 | 0.000000 | 0.000000 | 8.000000 | 1.000000 |
| max | 2020.000000 | 12.000000 | 31.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 9.000000 | 22.000000 | 110.000000 | 1385.000000 | 10878.000000 | 13.000000 | 1.000000 |
#GTDB_USA['Group'].value_counts().head(20)
mask = np.zeros_like(GTDB_USA.corr())
mask[np.triu_indices_from(mask)] = True
plt.figure(figsize = (26,16))
sns.heatmap(GTDB_USA.corr(), mask=mask, annot=True, cmap="RdYlGn", linewidths=.75)
<AxesSubplot:>
# Number Of Terrorist Activities By Attack Type
#plt.subplots(figsize=(15,5))
sns.countplot(x=GTDB_USA["Attack_Type"], data=GTDB_USA, palette='viridis_r', order = GTDB_USA['Attack_Type'].value_counts().index)
plt.xticks(rotation=45)
plt.ylabel('Total')
plt.xlabel('Attack Type')
plt.xticks([0,1,2,3,4,5,6,7], ['FACILITY / \nINFRASTRUCTURE \nATTACK','BOMBING/\nEXPLOSION','ARMED ASSAULT','ASSASSINATION',
'HOSTAGE TAKING\n(BARRICADE INCIDENT)','UNARMED ASSAULT', 'HOSTAGE TAKING\n(KIDNAPPING)',
'HIJACKING'])
plt.title('Number Of Terrorist Activities By Attack Type')
plt.show()
GTDB_USA['Attack_Type'].value_counts()
3 1110 7 928 2 350 1 118 8 88 5 48 6 20 4 18 9 3 Name: Attack_Type, dtype: int64
# Terrorist Activities By Weapon Type
#plt.subplots(figsize=(15,5))
sns.countplot(x=GTDB_USA["Weapon_type"], data=GTDB_USA, palette='viridis_r', order = GTDB_USA['Weapon_type'].value_counts().index)
plt.xticks(rotation=45)
plt.ylabel('Total')
plt.xlabel('Weapon Type')
plt.xticks([0,1,2,3,4], ['Incendiary','Explosives','Firearms','Melee','Chemical'])
plt.title('Terrorist Activities By Weapon Type')
plt.show()
GTDB_USA['Weapon_type'].value_counts()
6 1133 8 909 5 445 9 66 2 25 10 25 1 23 11 18 13 17 12 17 7 4 3 1 Name: Weapon_type, dtype: int64
#Number Of Terrorist Activities Each Year
years = GTDB_USA["Year"].value_counts().to_frame()
years = years.rename(columns={'Year':'count'})
years['year'] = years.index
years.reset_index(inplace=True)
years = years.drop(columns='index')
years['killed'] = GTDB_USA.groupby(['Year']).Killed.sum()
years['wounded'] = GTDB_USA.groupby(['Year']).Wounded.sum()
years
| count | year | killed | wounded | |
|---|---|---|---|---|
| 0 | 437 | 1970 | NaN | NaN |
| 1 | 223 | 1971 | NaN | NaN |
| 2 | 119 | 1975 | NaN | NaN |
| 3 | 103 | 1977 | NaN | NaN |
| 4 | 103 | 2020 | NaN | NaN |
| 5 | 80 | 1976 | NaN | NaN |
| 6 | 73 | 2018 | NaN | NaN |
| 7 | 72 | 2019 | NaN | NaN |
| 8 | 68 | 2016 | NaN | NaN |
| 9 | 66 | 1974 | NaN | NaN |
| 10 | 63 | 1972 | NaN | NaN |
| 11 | 62 | 2017 | NaN | NaN |
| 12 | 59 | 1995 | NaN | NaN |
| 13 | 54 | 1984 | NaN | NaN |
| 14 | 54 | 1973 | NaN | NaN |
| 15 | 53 | 1999 | NaN | NaN |
| 16 | 52 | 1994 | NaN | NaN |
| 17 | 50 | 1982 | NaN | NaN |
| 18 | 49 | 1978 | NaN | NaN |
| 19 | 48 | 1980 | NaN | NaN |
| 20 | 47 | 1981 | NaN | NaN |
| 21 | 45 | 2001 | NaN | NaN |
| 22 | 43 | 1979 | NaN | NaN |
| 23 | 40 | 2015 | NaN | NaN |
| 24 | 40 | 1997 | NaN | NaN |
| 25 | 39 | 2000 | NaN | NaN |
| 26 | 36 | 1983 | NaN | NaN |
| 27 | 34 | 2003 | NaN | NaN |
| 28 | 34 | 1996 | NaN | NaN |
| 29 | 34 | 1985 | NaN | NaN |
| 30 | 33 | 2002 | NaN | NaN |
| 31 | 31 | 1992 | NaN | NaN |
| 32 | 30 | 1989 | NaN | NaN |
| 33 | 29 | 2014 | NaN | NaN |
| 34 | 27 | 1990 | NaN | NaN |
| 35 | 26 | 1986 | NaN | NaN |
| 36 | 24 | 1998 | NaN | NaN |
| 37 | 23 | 1991 | NaN | NaN |
| 38 | 23 | 1987 | NaN | NaN |
| 39 | 21 | 2005 | NaN | NaN |
| 40 | 20 | 2012 | NaN | NaN |
| 41 | 19 | 2013 | NaN | NaN |
| 42 | 19 | 2010 | NaN | NaN |
| 43 | 17 | 2008 | NaN | NaN |
| 44 | 17 | 1988 | NaN | NaN |
| 45 | 11 | 2009 | NaN | NaN |
| 46 | 11 | 2007 | NaN | NaN |
| 47 | 9 | 2011 | NaN | NaN |
| 48 | 9 | 2004 | NaN | NaN |
| 49 | 4 | 2006 | NaN | NaN |
# Number Of Terrorist Activities Each Year
plt.subplots(figsize=(15,5))
sns.lineplot(data=years, x="year", y='count')
plt.xticks(rotation=90)
plt.title('Number Of Terrorist Activities Each Year')
plt.show()
years2 = pd.DataFrame(GTDB_USA.groupby(['Year']).Killed.sum())
years2['wounded'] = pd.DataFrame(GTDB_USA.groupby(['Year']).Wounded.sum())
years2['count'] = GTDB_USA["Year"].value_counts().to_frame()
#years2['year'] = years2.index
years2.reset_index(inplace=True)
years2['count'] = years2['count'].astype(float)
years2 = years2[years2.Year > 1971]
#years2.drop(years2[years2['Year']] == 1970)
years2
| Year | Killed | wounded | count | |
|---|---|---|---|---|
| 2 | 1972 | 9.0 | 22.0 | 63.0 |
| 3 | 1973 | 45.0 | 33.0 | 54.0 |
| 4 | 1974 | 16.0 | 50.0 | 66.0 |
| 5 | 1975 | 21.0 | 146.0 | 119.0 |
| 6 | 1976 | 4.0 | 41.0 | 80.0 |
| 7 | 1977 | 4.0 | 10.0 | 103.0 |
| 8 | 1978 | 4.0 | 7.0 | 49.0 |
| 9 | 1979 | 14.0 | 36.0 | 43.0 |
| 10 | 1980 | 15.0 | 20.0 | 48.0 |
| 11 | 1981 | 7.0 | 12.0 | 47.0 |
| 12 | 1982 | 7.0 | 31.0 | 50.0 |
| 13 | 1983 | 7.0 | 5.0 | 36.0 |
| 14 | 1984 | 3.0 | 780.0 | 54.0 |
| 15 | 1985 | 3.0 | 12.0 | 34.0 |
| 16 | 1986 | 0.0 | 35.0 | 26.0 |
| 17 | 1987 | 1.0 | 1.0 | 23.0 |
| 18 | 1988 | 0.0 | 0.0 | 17.0 |
| 19 | 1989 | 2.0 | 14.0 | 30.0 |
| 20 | 1990 | 5.0 | 7.0 | 27.0 |
| 21 | 1991 | 26.0 | 31.0 | 23.0 |
| 22 | 1992 | 2.0 | 3.0 | 31.0 |
| 23 | 1994 | 8.0 | 16.0 | 52.0 |
| 24 | 1995 | 178.0 | 738.0 | 59.0 |
| 25 | 1996 | 2.0 | 84.0 | 34.0 |
| 26 | 1997 | 2.0 | 19.0 | 40.0 |
| 27 | 1998 | 4.0 | 2.0 | 24.0 |
| 28 | 1999 | 20.0 | 40.0 | 53.0 |
| 29 | 2000 | 8.0 | 10.0 | 39.0 |
| 30 | 2001 | 3014.0 | 21894.0 | 45.0 |
| 31 | 2002 | 4.0 | 11.0 | 33.0 |
| 32 | 2003 | 0.0 | 0.0 | 34.0 |
| 33 | 2004 | 0.0 | 0.0 | 9.0 |
| 34 | 2005 | 0.0 | 0.0 | 21.0 |
| 35 | 2006 | 0.0 | 9.0 | 4.0 |
| 36 | 2007 | 0.0 | 0.0 | 11.0 |
| 37 | 2008 | 2.0 | 12.0 | 17.0 |
| 38 | 2009 | 21.0 | 50.0 | 11.0 |
| 39 | 2010 | 4.0 | 17.0 | 19.0 |
| 40 | 2011 | 0.0 | 2.0 | 9.0 |
| 41 | 2012 | 7.0 | 7.0 | 20.0 |
| 42 | 2013 | 21.0 | 420.0 | 19.0 |
| 43 | 2014 | 26.0 | 19.0 | 29.0 |
| 44 | 2015 | 54.0 | 59.0 | 40.0 |
| 45 | 2016 | 68.0 | 150.0 | 68.0 |
| 46 | 2017 | 98.0 | 948.0 | 62.0 |
| 47 | 2018 | 42.0 | 61.0 | 73.0 |
| 48 | 2019 | 53.0 | 115.0 | 72.0 |
| 49 | 2020 | 12.0 | 32.0 | 103.0 |
# Number of Terrorist Attacks Per Year
sns.barplot(data=years2, x='Year', y='count',palette=('viridis_r'))
plt.xticks(rotation=45)
#fig=plt.gcf()
#plt.set_size_inches(100,10)
plt.title('Number of Terrorist Attacks Per Year')
plt.show()
# Creating trace1
trace1 = go.Scatter(
x = years2.Year,
y = years2.Killed,
mode = "lines",
name = "Total Killed",
marker = dict(color = 'rgba(16, 112, 2, 0.8)'),
text= years2.Killed)
# Creating trace2
trace2 = go.Scatter(
x = years2.Year,
y = years2.wounded,
mode = "lines",
name = "Total Wounded",
marker = dict(color = 'rgba(80, 26, 80, 0.8)'),
text= years2.wounded)
# Creating trace3
#trace3 = go.Scatter(
#x = years2.Year,
#y = years2.count,
#mode = "lines",
#name = "Total Attacks",
#marker = dict(color = 'rgba(255, 26, 80, 0.8)'),
#text= years2.count)
data = [trace1, trace2]
layout = dict(title = 'Total Casualties',
xaxis= dict(title= 'Year',ticklen= 5,zeroline= False)
)
fig = dict(data = data, layout = layout)
iplot(fig)
# Number Of Terrorist Activities in Each state
state_attacks = GTDB_USA["State"].value_counts().to_frame()
state_attacks = state_attacks.rename(columns={'State':'count'})
state_attacks['state'] = state_attacks.index
state_attacks.reset_index(inplace=True)
state_attacks = state_attacks.drop(columns='index')
#state_attacks.drop([46], axis=0, inplace = True) # Drop 'US. Virgin Islands'
#state_attacks.drop([16], axis=0, inplace = True) # Drop 'District of Columba'
state_attacks
| count | state | |
|---|---|---|
| 0 | 579 | California |
| 1 | 499 | New York |
| 2 | 139 | Florida |
| 3 | 115 | Washington |
| 4 | 110 | Illinois |
| 5 | 89 | Texas |
| 6 | 83 | District of Columbia |
| 7 | 70 | Oregon |
| 8 | 64 | Massachusetts |
| 9 | 56 | Ohio |
| 10 | 50 | Michigan |
| 11 | 49 | Pennsylvania |
| 12 | 47 | Colorado |
| 13 | 46 | Missouri |
| 14 | 44 | Arizona |
| 15 | 44 | New Jersey |
| 16 | 41 | Georgia |
| 17 | 41 | Wisconsin |
| 18 | 41 | Virginia |
| 19 | 37 | Maryland |
| 20 | 34 | North Carolina |
| 21 | 34 | Tennessee |
| 22 | 28 | Minnesota |
| 23 | 28 | Louisiana |
| 24 | 25 | Indiana |
| 25 | 24 | Nevada |
| 26 | 24 | New Mexico |
| 27 | 22 | Nebraska |
| 28 | 22 | Iowa |
| 29 | 22 | Utah |
| 30 | 18 | Connecticut |
| 31 | 17 | Alabama |
| 32 | 15 | Oklahoma |
| 33 | 14 | Mississippi |
| 34 | 14 | Kansas |
| 35 | 12 | Idaho |
| 36 | 11 | New Hampshire |
| 37 | 11 | South Carolina |
| 38 | 9 | South Dakota |
| 39 | 8 | Delaware |
| 40 | 8 | Arkansas |
| 41 | 7 | Montana |
| 42 | 7 | Kentucky |
| 43 | 7 | North Dakota |
| 44 | 5 | Hawaii |
| 45 | 4 | Vermont |
| 46 | 3 | Wyoming |
| 47 | 2 | West Virginia |
| 48 | 2 | Rhode Island |
| 49 | 1 | Alaska |
| 50 | 1 | Maine |
# Number of Terrorist Attacks Per State
sns.barplot(x=GTDB_USA['State'].value_counts().values, y=GTDB_USA['State'].value_counts().index,palette=('viridis_r'))
plt.xticks(rotation=0)
fig=plt.gcf()
fig.set_size_inches(15,10)
plt.title('Number of Terrorist Attacks Per State')
plt.show()
California and New York have the highest number of terrorist attacks. This makes since as these are the 2 highest populated states. Later we will calculate the number of attacks per 100,000 people in each state, for a more accurate interpretation.
# The Most Active Terror Groups
sns.barplot(x=GTDB_USA['Group'].value_counts()[:10].values, y=GTDB_USA['Group'].value_counts()[:10].index,palette=('viridis_r'))
plt.xticks(rotation=0)
fig=plt.gcf()
fig.set_size_inches(20,10)
plt.title('Top 10 Most Active Terror Groups')
plt.show()
Top10Groups = GTDB_USA['Group'].value_counts()[:10].to_frame()
Top10Groups = Top10Groups.rename(columns={'Group':'count'})
Top10Groups['Group'] = Top10Groups.index
Top10Groups.reset_index(inplace=True)
Top10Groups = Top10Groups.drop(columns='index')
Top10Groups
| count | Group | |
|---|---|---|
| 0 | 510 | Unknown |
| 1 | 233 | Anti-Abortion extremists |
| 2 | 167 | Left-Wing Militants |
| 3 | 127 | White supremacists/nationalists |
| 4 | 88 | Fuerzas Armadas de Liberacion Nacional (FALN) |
| 5 | 79 | Black Nationalists |
| 6 | 70 | Animal Liberation Front (ALF) |
| 7 | 70 | Student Radicals |
| 8 | 65 | New World Liberation Front (NWLF) |
| 9 | 65 | Earth Liberation Front (ELF) |
fig = px.pie(Top10Groups, values='count', names='Group',
title='Top 10 Most Active Terror Groups',
hover_data=['Group'])
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()
Most of the attacks are not affiliated with a group. However of the attacks that are, Anti-Abortion Extremists are the most prevalent.
# Successful attacks
#plt.subplots(figsize=(15,5))
sns.countplot(x=GTDB_USA["success"], data=GTDB_USA, palette='mako_r', order = GTDB_USA['success'].value_counts().index)
#plt.xticks(rotation=45)
plt.ylabel('Total')
plt.xlabel('Success')
plt.xticks([0,1], ['Successful','Unsuccessful'])
plt.title('Successful attacks')
plt.show()
GTDB_USA['success'].value_counts()
1 2223 0 460 Name: success, dtype: int64
TargetPlotData = GTDB_USA[['Target', 'Killed', 'Wounded']]
TargetPlotData
| Target | Killed | Wounded | |
|---|---|---|---|
| 0 | Police | 0.0 | 0.0 |
| 1 | Utilities | 0.0 | 0.0 |
| 2 | Military | 0.0 | 0.0 |
| 3 | Government (General) | 0.0 | 0.0 |
| 4 | Military | 0.0 | 0.0 |
| ... | ... | ... | ... |
| 3116 | Religious Figures/Institutions | 0.0 | 0.0 |
| 3117 | Religious Figures/Institutions | 1.0 | 0.0 |
| 3118 | Religious Figures/Institutions | 0.0 | 0.0 |
| 3119 | Business | 1.0 | 3.0 |
| 3120 | Religious Figures/Institutions | 0.0 | 0.0 |
2683 rows × 3 columns
TargetPlotData['Target'].value_counts()
Business 643 Private Citizens & Property 404 Government (General) 306 Abortion Related 256 Religious Figures/Institutions 230 Police 188 Educational Institution 169 Military 134 Government (Diplomatic) 112 Journalists & Media 55 Utilities 52 Airports & Aircraft 48 NGO 25 Transportation 16 Telecommunication 12 Tourists 10 Terrorists/Non-State Militia 9 Violent Political Party 6 Food or Water Supply 3 Maritime 3 Other 2 Name: Target, dtype: int64
# terrorist attack targets grouped in categories
target_codes = []
for attack in TargetPlotData['Target'].values:
if attack in ['Business', 'Journalists & Media', 'NGO']:
target_codes.append(1)
elif attack in ['Government (General)', 'Government (Diplomatic)']:
target_codes.append(2)
elif attack == 'Abortion Related':
target_codes.append(4)
elif attack == 'Educational Institution':
target_codes.append(5)
elif attack == 'Police':
target_codes.append(6)
elif attack == 'Military':
target_codes.append(7)
elif attack == 'Religious Figures/Institutions':
target_codes.append(8)
elif attack in ['Airports & Aircraft', 'Maritime', 'Transportation']:
target_codes.append(9)
elif attack in ['Food or Water Supply', 'Telecommunication', 'Utilities']:
target_codes.append(10)
else:
target_codes.append(3)
TargetPlotData['Target'] = target_codes
target_categories = ['Business', 'Government', 'Individuals', 'Healthcare', 'Education',
'Police', 'Military', 'Religion', 'Transportation', 'Infrastructure']
/var/folders/24/536gs7r91qzd964t2ppqhs6m0000gn/T/ipykernel_88804/2308851232.py:26: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
TargetPlotData['Target'].value_counts()
1 723 3 431 2 418 4 256 8 230 6 188 5 169 7 134 10 67 9 67 Name: Target, dtype: int64
# terrorist attacks by target
target_count = np.asarray(TargetPlotData.groupby('Target').Target.count())
target_percent = np.round(target_count / sum(target_count) * 100, 2)
# terrorist attack fatalities by target
target_fatality = np.asarray(TargetPlotData.groupby('Target')['Killed'].sum())
target_yaxis = np.asarray([2.75, 3, 3.75, 1.1, 1.8, 2, 1.5, 1.75, 1.45])
# terrorist attack injuries by target
target_injury = np.asarray(TargetPlotData.groupby('Target')['Wounded'].sum())
target_xaxis = np.log10(target_injury)
target_text = []
for i in range(0, 9):
target_text.append(target_categories[i] + ' (' + target_percent[i].astype(str)
+ '%)<br>' + target_fatality[i].astype(str) + ' Killed, '
+ target_injury[i].astype(str) + ' Injured')
data = [go.Scatter(
x = target_injury,
y = target_fatality,
text = target_text,
mode = 'markers',
hoverinfo = 'text',
marker = dict(
size = target_count / 6.5,
opacity = 0.9,
color = 'rgb(240, 140, 45)')
)]
layout = go.Layout(
title = 'Terrorist Attacks by Target in United States (1970-2022)',
xaxis = dict(
title = 'Injuries',
type = 'log',
range = [1.36, 3.25],
tickmode = 'auto',
nticks = 2,
showline = True,
showgrid = False
),
yaxis = dict(
title = 'Fatalities',
type = 'log',
range = [0.59, 3.45],
tickmode = 'auto',
nticks = 4,
showline = True,
showgrid = False)
)
annotations = []
for i in range(0, 9):
annotations.append(dict(x=target_xaxis[i], y=target_yaxis[i],
xanchor='auto', yanchor='auto',
text=target_categories[i], showarrow=True))
layout['annotations'] = annotations
figure = dict(data = data, layout = layout)
iplot(figure)
WeaponPlotData = GTDB_USA[['Weapon', 'Killed', 'Wounded']]
WeaponPlotData
| Weapon | Killed | Wounded | |
|---|---|---|---|
| 0 | Firearms | 0.0 | 0.0 |
| 1 | Explosives | 0.0 | 0.0 |
| 2 | Incendiary | 0.0 | 0.0 |
| 3 | Incendiary | 0.0 | 0.0 |
| 4 | Explosives | 0.0 | 0.0 |
| ... | ... | ... | ... |
| 3116 | Incendiary | 0.0 | 0.0 |
| 3117 | Firearms | 1.0 | 0.0 |
| 3118 | Incendiary | 0.0 | 0.0 |
| 3119 | Explosives | 1.0 | 3.0 |
| 3120 | Incendiary | 0.0 | 0.0 |
2683 rows × 3 columns
WeaponPlotData['Weapon'].value_counts()
Explosives 1133 Incendiary 909 Firearms 445 Melee 66 Chemical 25 Vehicle (not to include vehicle-borne explosives, i.e., car or truck bombs) 25 Biological 23 Sabotage Equipment 18 Unknown 17 Other 17 Fake Weapons 4 Radiological 1 Name: Weapon, dtype: int64
# terrorist attack weapons grouped in categories
weapon_codes = []
for attack in WeaponPlotData['Weapon'].values:
if attack in ['Explosives', 'Sabotage Equipment']:
weapon_codes.append(1)
elif attack == 'Incendiary':
weapon_codes.append(2)
elif attack in ['Firearms', 'Fake Weapons']:
weapon_codes.append(3)
elif attack == 'Melee':
weapon_codes.append(5)
elif attack == 'Biological':
weapon_codes.append(6)
elif attack in ['Chemical', 'Radiological']:
weapon_codes.append(7)
elif 'Vehicle' in attack:
weapon_codes.append(8)
else:
weapon_codes.append(4)
WeaponPlotData['Weapon'] = weapon_codes
weapon_categories = ['Explosives', 'Flammables', 'Firearms', 'Miscellaneous',
'Knives', 'Bacteria/Viruses', 'Chemicals', 'Vehicles']
/var/folders/24/536gs7r91qzd964t2ppqhs6m0000gn/T/ipykernel_88804/2231559974.py:22: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
# terrorist attacks by weapon
weapon_count = np.asarray(WeaponPlotData.groupby('Weapon').Weapon.count())
weapon_percent = np.round(weapon_count / sum(weapon_count) * 100, 2)
# terrorist attack fatalities by weapon
weapon_fatality = np.asarray(WeaponPlotData.groupby('Weapon')['Killed'].sum())
weapon_yaxis = np.asarray([2, 1.75, 2.85, 0.85, 1.35, 0.83, 0.835, 3.45])
# terrorist attack injuries by weapon
weapon_injury = np.asarray(WeaponPlotData.groupby('Weapon')['Wounded'].sum())
weapon_xaxis = np.log10(weapon_injury)
weapon_text = []
for i in range(0, 8):
weapon_text.append(weapon_categories[i] + ' (' + weapon_percent[i].astype(str)
+ '%)<br>' + weapon_fatality[i].astype(str) + ' Killed, '
+ weapon_injury[i].astype(str) + ' Injured')
weapon_fatality[6] = 7
data = [go.Scatter(
x = weapon_injury,
y = weapon_fatality,
text = weapon_text,
mode = 'markers',
hoverinfo = 'text',
marker = dict(
size = (weapon_count + 50) / 10,
opacity = 0.9,
color = 'rgb(240, 140, 45)')
)]
layout = go.Layout(
title = 'Terrorist Attacks by Weapon in United States (1970-2022)',
xaxis = dict(
title = 'Injuries',
type = 'log',
range = [0.45, 3.51],
tickmode = 'auto',
nticks = 4,
showline = True,
showgrid = False
),
yaxis = dict(
title = 'Fatalities',
type = 'log',
range = [0.65, 3.33],
tickmode = 'auto',
nticks = 3,
showline = True,
showgrid = False)
)
annotations = []
for i in range(0, 8):
annotations.append(dict(x=weapon_xaxis[i], y=weapon_yaxis[i],
xanchor='auto', yanchor='auto',
text=weapon_categories[i], showarrow=True))
layout['annotations'] = annotations
figure = dict(data = data, layout = layout)
iplot(figure)
NOTE: Double click the above graph to zoom out.
terror_location = GTDB_USA_all[['iyear', 'imonth', 'iday', 'nkill', 'nwound', 'latitude', 'longitude']]
terror_location.rename(columns={'iyear':'Year','imonth':'Month','iday':'Day'},inplace=True)
terror_location['Day'][terror_location.Day == 0] = 1
terror_location['date'] = pd.to_datetime(terror_location[['Day', 'Month', 'Year']])
terror_location
/var/folders/24/536gs7r91qzd964t2ppqhs6m0000gn/T/ipykernel_88804/1639713690.py:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy /var/folders/24/536gs7r91qzd964t2ppqhs6m0000gn/T/ipykernel_88804/1639713690.py:3: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy /var/folders/24/536gs7r91qzd964t2ppqhs6m0000gn/T/ipykernel_88804/1639713690.py:4: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
| Year | Month | Day | nkill | nwound | latitude | longitude | date | |
|---|---|---|---|---|---|---|---|---|
| 0 | 1970 | 1 | 1 | 0.0 | 0.0 | 37.005105 | -89.176269 | 1970-01-01 |
| 1 | 1970 | 1 | 2 | 0.0 | 0.0 | 37.791927 | -122.225906 | 1970-01-02 |
| 2 | 1970 | 1 | 2 | 0.0 | 0.0 | 43.076592 | -89.412488 | 1970-01-02 |
| 3 | 1970 | 1 | 3 | 0.0 | 0.0 | 43.072950 | -89.386694 | 1970-01-03 |
| 4 | 1970 | 1 | 1 | 0.0 | 0.0 | 43.468500 | -89.744299 | 1970-01-01 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 3116 | 2020 | 12 | 13 | 0.0 | 0.0 | 42.115512 | -72.539521 | 2020-12-13 |
| 3117 | 2020 | 12 | 13 | 1.0 | 0.0 | 40.694257 | -73.930680 | 2020-12-13 |
| 3118 | 2020 | 12 | 15 | 0.0 | 0.0 | 42.115512 | -72.539521 | 2020-12-15 |
| 3119 | 2020 | 12 | 25 | 1.0 | 3.0 | 36.171469 | -86.784299 | 2020-12-25 |
| 3120 | 2020 | 12 | 28 | 0.0 | 0.0 | 42.115512 | -72.539521 | 2020-12-28 |
3121 rows × 8 columns
terror_location['text'] = terror_location['date'].dt.strftime('%B %-d, %Y') + '<br>' +\
terror_location['nkill'].astype(str) + ' Killed, ' +\
terror_location['nwound'].astype(str) + ' Injured'
fatality = dict(
type = 'scattergeo',
locationmode = 'USA-states',
lon = terror_location[terror_location.nkill > 0]['longitude'],
lat = terror_location[terror_location.nkill > 0]['latitude'],
text = terror_location[terror_location.nkill > 0]['text'],
mode = 'markers',
name = 'Fatalities',
hoverinfo = 'text+name',
marker = dict(
size = terror_location[terror_location.nkill > 0]['nkill'] ** 0.255 * 8,
opacity = 0.95,
color = 'rgb(240, 140, 45)')
)
injury = dict(
type = 'scattergeo',
locationmode = 'USA-states',
lon = terror_location[terror_location.nkill == 0]['longitude'],
lat = terror_location[terror_location.nkill == 0]['latitude'],
text = terror_location[terror_location.nkill == 0]['text'],
mode = 'markers',
name = 'Injuries',
hoverinfo = 'text+name',
marker = dict(
#size = (terror_location[terror_location.nkill == 0]['nwound'] + 1) ** 0.245 * 8,
opacity = 0.85,
color = 'rgb(20, 150, 187)')
)
layout = dict(
title = 'Terrorist Attacks by Latitude/Longitude in United States (1970-2022)',
showlegend = True,
legend = dict(
x = 0.85, y = 0.4
),
geo = dict(
scope = 'usa',
projection = dict(type = 'albers usa'),
showland = True,
landcolor = 'rgb(250, 250, 250)',
subunitwidth = 1,
subunitcolor = 'rgb(217, 217, 217)',
countrywidth = 1,
countrycolor = 'rgb(217, 217, 217)',
showlakes = True,
lakecolor = 'rgb(255, 255, 255)')
)
data = [fatality, injury]
figure = dict(data = data, layout = layout)
iplot(figure)
/var/folders/24/536gs7r91qzd964t2ppqhs6m0000gn/T/ipykernel_88804/3975754310.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
us_states = np.asarray(['AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'DC', 'FL', 'GA',
'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 'MA',
'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY',
'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX',
'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY'])
# state population estimates for 2022 from US Census Bureau
state_population = np.asarray([5073187, 738023, 7303398, 3030646, 39995077, 5922618,
3612314, 1008350, 707109, 22085563, 10916760, 1474265,
1893410, 12808884, 6845874, 3219171, 2954832, 4539130,
4682633, 1369159, 6257958, 7126375, 10116069, 5787008,
2960075, 6188111, 1103187, 1988536, 3185426, 1389741,
9388414, 2129190, 20365879, 10620168, 800394, 11852036,
4000953, 4318492, 13062764, 1106341, 5217037, 901165,
7023788, 29945493, 3373162, 646545, 8757467, 7901429,
1781860, 5935064, 579495])
# terrorist attacks per 100,000 people in state
terror_perstate = np.asarray(GTDB_USA.groupby('State').State.count())
terror_percapita = np.round(terror_perstate / state_population * 100000, 2)
# District of Columbia outlier (1 terrorist attack per 10,000 people) adjusted
terror_percapita[8] = round(terror_percapita[8] / 6, 2)
terror_scale = [[0, 'rgb(252, 232, 213)'], [1, 'rgb(240, 140, 45)']]
data = [dict(
type = 'choropleth',
autocolorscale = False,
colorscale = terror_scale,
showscale = False,
locations = us_states,
locationmode = 'USA-states',
z = terror_percapita,
marker = dict(
line = dict(
color = 'rgb(255, 255, 255)',
width = 2)
)
)]
layout = dict(
title = 'Terrorist Attacks per 100,000 People in United States (1970-2022)',
geo = dict(
scope = 'usa',
projection = dict(type = 'albers usa'),
countrycolor = 'rgb(255, 255, 255)',
showlakes = True,
lakecolor = 'rgb(255, 255, 255)')
)
figure = dict(data = data, layout = layout)
iplot(figure)
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_selection import chi2, SelectKBest
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
textData = GTDB_USA_all[['motive', 'attacktype1_txt', 'targtype1_txt']]
textData
| motive | attacktype1_txt | targtype1_txt | |
|---|---|---|---|
| 0 | To protest the Cairo Illinois Police Deparment | Armed Assault | Police |
| 1 | NaN | Bombing/Explosion | Utilities |
| 2 | To protest the War in Vietnam and the draft | Facility/Infrastructure Attack | Military |
| 3 | To protest the War in Vietnam and the draft | Facility/Infrastructure Attack | Government (General) |
| 4 | NaN | Bombing/Explosion | Military |
| ... | ... | ... | ... |
| 3116 | The specific motive is unknown; however, sourc... | Facility/Infrastructure Attack | Religious Figures/Institutions |
| 3117 | Luis Vasquez, an unaffiliated individual, clai... | Hostage Taking (Kidnapping) | Religious Figures/Institutions |
| 3118 | The specific motive is unknown; however, sourc... | Facility/Infrastructure Attack | Religious Figures/Institutions |
| 3119 | The specific motive is unknown; however, sourc... | Bombing/Explosion | Business |
| 3120 | The specific motive is unknown; however, sourc... | Facility/Infrastructure Attack | Religious Figures/Institutions |
3121 rows × 3 columns
textData.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 3121 entries, 0 to 3120 Data columns (total 3 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 motive 1691 non-null object 1 attacktype1_txt 3121 non-null object 2 targtype1_txt 3121 non-null object dtypes: object(3) memory usage: 73.3+ KB
textData = textData.dropna()
textData.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 1691 entries, 0 to 3120 Data columns (total 3 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 motive 1691 non-null object 1 attacktype1_txt 1691 non-null object 2 targtype1_txt 1691 non-null object dtypes: object(3) memory usage: 52.8+ KB
textData = textData[textData.motive.isin(['nan', 'Unknown']) == False]
textData
| motive | attacktype1_txt | targtype1_txt | |
|---|---|---|---|
| 0 | To protest the Cairo Illinois Police Deparment | Armed Assault | Police |
| 2 | To protest the War in Vietnam and the draft | Facility/Infrastructure Attack | Military |
| 3 | To protest the War in Vietnam and the draft | Facility/Infrastructure Attack | Government (General) |
| 5 | Protest the draft and Vietnam War | Facility/Infrastructure Attack | Military |
| 7 | To protest United States owned businesses in P... | Facility/Infrastructure Attack | Business |
| ... | ... | ... | ... |
| 3116 | The specific motive is unknown; however, sourc... | Facility/Infrastructure Attack | Religious Figures/Institutions |
| 3117 | Luis Vasquez, an unaffiliated individual, clai... | Hostage Taking (Kidnapping) | Religious Figures/Institutions |
| 3118 | The specific motive is unknown; however, sourc... | Facility/Infrastructure Attack | Religious Figures/Institutions |
| 3119 | The specific motive is unknown; however, sourc... | Bombing/Explosion | Business |
| 3120 | The specific motive is unknown; however, sourc... | Facility/Infrastructure Attack | Religious Figures/Institutions |
1514 rows × 3 columns
Initialize TfidfVectorizer with desired parameters (default smoothing and normalization)
tfidf_vectorizer = TfidfVectorizer(analyzer='word',stop_words= 'english')
The feature that has the text data we want to work with is 'motive'. We will use that to TfidfVectorizer. Using the fit_transform() method to learn vocabulary and idf. This method returns document-term matrix.
tfidf_vector = tfidf_vectorizer.fit_transform(textData['motive'])
tfidf_vector
<1514x3305 sparse matrix of type '<class 'numpy.float64'>' with 18514 stored elements in Compressed Sparse Row format>
# How many words did we count?
len(tfidf_vectorizer.get_feature_names())
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/utils/deprecation.py:87: FutureWarning: Function get_feature_names is deprecated; get_feature_names is deprecated in 1.0 and will be removed in 1.2. Please use get_feature_names_out instead.
3305
# list the words
tfidf_vectorizer.get_feature_names()[:10] # listing the first 10 only
['000', '10', '100', '11', '11th', '150', '1619', '17', '18', '19']
Make a DataFrame out of the resulting tf–idf vector, setting the “feature names” or words as columns and the titles as rows
tfidf_df = pd.DataFrame(tfidf_vector.toarray(), columns=tfidf_vectorizer.get_feature_names())
tfidf_df
| 000 | 10 | 100 | 11 | 11th | 150 | 1619 | 17 | 18 | 19 | ... | yugoslavia | zaremski | zealand | zebra | zeldin | zero | zhang | zionist | zionists | zuniga | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 1 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 3 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 4 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1509 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 1510 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 1511 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 1512 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 1513 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
1514 rows × 3305 columns
It is clear that we have a lot of features now. But if we take a closer look at our matrix we can easily tell that it is very sparse with many 0 values.
Let's create some functions to help us with our analysis keeping in mind when we say document here were are referring to an single entry(event) in our dataset.
This function takes a single row of the tf-idf matrix (corresponding to a particular document), and returns the n highest scoring words (or more generally tokens or features):
# Get top n tfidf values in row and return them with their corresponding feature names.
def top_tfidf_feats(row, features, top_n=25):
topn_ids = np.argsort(row)[::-1][:top_n]
top_feats = [(features[i], row[i]) for i in topn_ids]
df = pd.DataFrame(top_feats)
df.columns = ['feature', 'tfidf']
return df
Here we use argsort to produce the indices that would order the row by tf-idf value, reverse them (into descending order), and select the first top_n. We then return a pandas DataFrame with the words themselves (feature names) and their corresponding score.
The result of a tf-idf, however, is typically a sparse matrix, which doesn’t support all the usual matrix or array operations. So in order to apply the above function to inspect a particular document, we convert a single row into dense format first:
# Top tfidf features in specific document (matrix row)
def top_feats_in_doc(Xtr, features, row_id, top_n=25):
row = np.squeeze(Xtr[row_id].toarray())
return top_tfidf_feats(row, features, top_n)
For example, we can look at the first document we have and use this function to show the top 10 words used in the first document of our matrix.
textData.iloc[0]['motive']
'To protest the Cairo Illinois Police Deparment'
top_feats_in_doc(tfidf_vector,tfidf_vectorizer.get_feature_names(),0,10)
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/utils/deprecation.py:87: FutureWarning: Function get_feature_names is deprecated; get_feature_names is deprecated in 1.0 and will be removed in 1.2. Please use get_feature_names_out instead.
| feature | tfidf | |
|---|---|---|
| 0 | deparment | 0.583594 |
| 1 | cairo | 0.583594 |
| 2 | illinois | 0.443474 |
| 3 | police | 0.307955 |
| 4 | protest | 0.165317 |
| 5 | faculty | 0.000000 |
| 6 | far | 0.000000 |
| 7 | family | 0.000000 |
| 8 | families | 0.000000 |
| 9 | falsely | 0.000000 |
Calculate the average tf-idf score of all words across a number of documents (in this case all documents), i.e. the average per column of a tf-idf matrix:
def top_mean_feats(Xtr, features, grp_ids=None, min_tfidf=0.1, top_n=25):
if grp_ids:
D = Xtr[grp_ids].toarray()
else:
D = Xtr.toarray()
D[D < min_tfidf] = 0
tfidf_means = np.mean(D, axis=0)
return top_tfidf_feats(tfidf_means, features, top_n)
Calling this function with grp_ids=None, gives us the most important words across the whole dataset. Here are the top 10:
top_mean_feats(tfidf_vector,tfidf_vectorizer.get_feature_names(),top_n=15)
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/utils/deprecation.py:87: FutureWarning: Function get_feature_names is deprecated; get_feature_names is deprecated in 1.0 and will be removed in 1.2. Please use get_feature_names_out instead.
| feature | tfidf | |
|---|---|---|
| 0 | protest | 0.067913 |
| 1 | abortion | 0.063500 |
| 2 | sabotage | 0.044678 |
| 3 | motive | 0.032757 |
| 4 | practice | 0.032416 |
| 5 | facilities | 0.031839 |
| 6 | operating | 0.031658 |
| 7 | unknown | 0.030949 |
| 8 | specific | 0.030446 |
| 9 | attack | 0.028394 |
| 10 | draft | 0.025381 |
| 11 | united | 0.022003 |
| 12 | police | 0.020938 |
| 13 | states | 0.020117 |
| 14 | war | 0.018931 |
let’s calculate the mean tf-idf scores depending on a event type label. This function returns a list of dataframes, where each dataframe holds top_n features and their mean tfidf value calculated across rows(events) with the same class label.
def top_feats_by_class(Xtr, y, features, min_tfidf=0.1, top_n=25):
dfs = []
labels = np.unique(y)
for label in labels:
ids = np.where(y==label)
feats_df = top_mean_feats(Xtr, features, ids, min_tfidf=min_tfidf, top_n=top_n)
feats_df.label = label
dfs.append(feats_df)
return dfs
dfs = top_feats_by_class(tfidf_vector,textData['attacktype1_txt'],tfidf_vectorizer.get_feature_names())
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/utils/deprecation.py:87: FutureWarning: Function get_feature_names is deprecated; get_feature_names is deprecated in 1.0 and will be removed in 1.2. Please use get_feature_names_out instead.
Instead of printing the features out as a table, let’s create a figure in matplotlib:
# Plot the data frames returned by the function plot_tfidf_classfeats().
def plot_tfidf_classfeats_h(dfs):
fig = plt.figure(figsize=(45, 25), facecolor="w")
x = np.arange(len(dfs[0]))
for i, df in enumerate(dfs):
ax = fig.add_subplot(2, 4, i+1)
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.set_frame_on(False)
ax.get_xaxis().tick_bottom()
ax.get_yaxis().tick_left()
ax.set_xlabel("Mean Tf-Idf Score", labelpad=16, fontsize=14)
ax.set_title("label = " + str(df.label), fontsize=16)
ax.ticklabel_format(axis='x', style='sci', scilimits=(-2,2))
ax.barh(x, df.tfidf, align='center', color='#005035')
ax.set_yticks(x)
ax.set_ylim([-1, x[-1]+1])
yticks = ax.set_yticklabels(df.feature)
plt.subplots_adjust(bottom=0.09, right=0.97, left=0.15, top=0.95, wspace=0.52)
plt.show()
plot_tfidf_classfeats_h(dfs)
#dfs2 = top_feats_by_class(tfidf_vector,textData['targtype1_txt'],tfidf_vectorizer.get_feature_names())
#plot_tfidf_classfeats_h(dfs2)
Model_data = GTDB_USA[['Month', 'State', 'Attack_Type', 'Target_type', 'Weapon_type', 'success']]
Model_data.head()
| Month | State | Attack_Type | Target_type | Weapon_type | success | |
|---|---|---|---|---|---|---|
| 0 | 1 | Illinois | 2 | 3 | 5 | 1 |
| 1 | 1 | California | 3 | 21 | 6 | 1 |
| 2 | 1 | Wisconsin | 7 | 4 | 8 | 1 |
| 3 | 1 | Wisconsin | 7 | 2 | 8 | 1 |
| 4 | 1 | Wisconsin | 3 | 4 | 6 | 0 |
# create dummy fields before split
# converting categorical features using one-hot encoding (i.e., dummy features)
# textbook approach using statsmodels categorical function is depricated
# we will be using pandas get_dummies function instead
Model_data = pd.get_dummies(Model_data, columns=['State'], prefix = ['dummy'])
print(Model_data.columns)
Index(['Month', 'Attack_Type', 'Target_type', 'Weapon_type', 'success',
'dummy_Alabama', 'dummy_Alaska', 'dummy_Arizona', 'dummy_Arkansas',
'dummy_California', 'dummy_Colorado', 'dummy_Connecticut',
'dummy_Delaware', 'dummy_District of Columbia', 'dummy_Florida',
'dummy_Georgia', 'dummy_Hawaii', 'dummy_Idaho', 'dummy_Illinois',
'dummy_Indiana', 'dummy_Iowa', 'dummy_Kansas', 'dummy_Kentucky',
'dummy_Louisiana', 'dummy_Maine', 'dummy_Maryland',
'dummy_Massachusetts', 'dummy_Michigan', 'dummy_Minnesota',
'dummy_Mississippi', 'dummy_Missouri', 'dummy_Montana',
'dummy_Nebraska', 'dummy_Nevada', 'dummy_New Hampshire',
'dummy_New Jersey', 'dummy_New Mexico', 'dummy_New York',
'dummy_North Carolina', 'dummy_North Dakota', 'dummy_Ohio',
'dummy_Oklahoma', 'dummy_Oregon', 'dummy_Pennsylvania',
'dummy_Rhode Island', 'dummy_South Carolina', 'dummy_South Dakota',
'dummy_Tennessee', 'dummy_Texas', 'dummy_Utah', 'dummy_Vermont',
'dummy_Virginia', 'dummy_Washington', 'dummy_West Virginia',
'dummy_Wisconsin', 'dummy_Wyoming'],
dtype='object')
Model_data.head()
| Month | Attack_Type | Target_type | Weapon_type | success | dummy_Alabama | dummy_Alaska | dummy_Arizona | dummy_Arkansas | dummy_California | ... | dummy_South Dakota | dummy_Tennessee | dummy_Texas | dummy_Utah | dummy_Vermont | dummy_Virginia | dummy_Washington | dummy_West Virginia | dummy_Wisconsin | dummy_Wyoming | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 2 | 3 | 5 | 1 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1 | 1 | 3 | 21 | 6 | 1 | 0 | 0 | 0 | 0 | 1 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2 | 1 | 7 | 4 | 8 | 1 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| 3 | 1 | 7 | 2 | 8 | 1 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| 4 | 1 | 3 | 4 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
5 rows × 56 columns
# create training and testing sets at 80/20
Model_data_train, Model_data_test = train_test_split(Model_data, test_size = .2)
# for learning our model we want all columns except 'success'
x = Model_data.drop('success', axis=1)
x_test = Model_data.drop('success', axis=1)
# we want to predict the success let's make that our y
y = Model_data['success']
y_test = Model_data['success']
SMOTE or Synthetic Minority Oversampling Technique is an oversampling technique but SMOTE working differently than your typical oversampling.
In a classic oversampling technique, the minority data is duplicated from the minority data population. While it increases the number of data, it does not give any new information or variation to the machine learning model.
SMOTE works by utilizing a k-nearest neighbor algorithm to create synthetic data. SMOTE first start by choosing random data from the minority class, then k-nearest neighbors from the data are set. Synthetic data would then be made between the random data and the randomly selected k-nearest neighbor.
sns.set_theme(style="darkgrid")
sns.countplot(y=y, data=Model_data, palette="mako_r")
plt.ylabel('Success')
plt.xlabel('Total')
plt.yticks([0,1], ['Unsuccessful','Successful'])
plt.title('Unbalanced Data')
plt.show()
Model_data['success'].value_counts()
1 2223 0 460 Name: success, dtype: int64
This graph shows us that our predictor success is heavily skewed towards successful attacks. We have 2223 successful attacks and 460 unsuccessful attacks. In an effort to increase the accuracy of our models we can apply the Smote Balancing technique.
# Over Sample using SMOTE
from imblearn.over_sampling import SMOTE
x, y = SMOTE(k_neighbors=1).fit_resample(x, y)
To run models using under sampling using Cluster Centroids comment out the above SMOTE cell and run the cell below.
# Under sample using Cluster Centroids
#from imblearn.under_sampling import ClusterCentroids
#x, y = ClusterCentroids(random_state=42).fit_resample(x, y)
sns.set_theme(style="darkgrid")
sns.countplot(y=y, data=Model_data, palette="mako_r")
plt.ylabel('Success')
plt.xlabel('Total')
plt.yticks([0,1], ['Unsuccessful','Successful'])
plt.title('Balanced Data')
plt.show()
This shows us that the training set has been balanced to the distribution of successful attacks. After balancing the data approximately 79.3% of the unsuccessful attacks are synthetic data created by the SMOTE Technique, this may result in less accurate models later.
The SimpleClassifier first tries several baseline and instantaneous models, potentially on subsampled data, to get an idea of what a low baseline should be. This again is a good place to surface data leakage, as well as find the main discriminative features in the dataset. https://dabl.github.io/0.2.0/index.html#
#!pip install dabl
import dabl
BaseModel = dabl.SimpleClassifier(random_state=0).fit(Model_data, target_col="success")
Running DummyClassifier(random_state=0)
accuracy: 0.829 average_precision: 0.171 roc_auc: 0.500 recall_macro: 0.500 f1_macro: 0.453
=== new best DummyClassifier(random_state=0) (using recall_macro):
accuracy: 0.829 average_precision: 0.171 roc_auc: 0.500 recall_macro: 0.500 f1_macro: 0.453
Running GaussianNB()
accuracy: 0.245 average_precision: 0.237 roc_auc: 0.676 recall_macro: 0.532 f1_macro: 0.238
=== new best GaussianNB() (using recall_macro):
accuracy: 0.245 average_precision: 0.237 roc_auc: 0.676 recall_macro: 0.532 f1_macro: 0.238
Running MultinomialNB()
accuracy: 0.817 average_precision: 0.366 roc_auc: 0.698 recall_macro: 0.565 f1_macro: 0.574
=== new best MultinomialNB() (using recall_macro):
accuracy: 0.817 average_precision: 0.366 roc_auc: 0.698 recall_macro: 0.565 f1_macro: 0.574
Running DecisionTreeClassifier(class_weight='balanced', max_depth=1, random_state=0)
accuracy: 0.579 average_precision: 0.212 roc_auc: 0.600 recall_macro: 0.600 f1_macro: 0.514
=== new best DecisionTreeClassifier(class_weight='balanced', max_depth=1, random_state=0) (using recall_macro):
accuracy: 0.579 average_precision: 0.212 roc_auc: 0.600 recall_macro: 0.600 f1_macro: 0.514
Running DecisionTreeClassifier(class_weight='balanced', max_depth=5, random_state=0)
accuracy: 0.624 average_precision: 0.296 roc_auc: 0.660 recall_macro: 0.635 f1_macro: 0.552
=== new best DecisionTreeClassifier(class_weight='balanced', max_depth=5, random_state=0) (using recall_macro):
accuracy: 0.624 average_precision: 0.296 roc_auc: 0.660 recall_macro: 0.635 f1_macro: 0.552
Running DecisionTreeClassifier(class_weight='balanced', min_impurity_decrease=0.01,
random_state=0)
accuracy: 0.594 average_precision: 0.239 roc_auc: 0.646 recall_macro: 0.628 f1_macro: 0.531
Running LogisticRegression(C=0.1, class_weight='balanced', max_iter=1000,
random_state=0)
accuracy: 0.643 average_precision: 0.357 roc_auc: 0.299 recall_macro: 0.657 f1_macro: 0.571
=== new best LogisticRegression(C=0.1, class_weight='balanced', max_iter=1000,
random_state=0) (using recall_macro):
accuracy: 0.643 average_precision: 0.357 roc_auc: 0.299 recall_macro: 0.657 f1_macro: 0.571
Running LogisticRegression(C=1, class_weight='balanced', max_iter=1000, random_state=0)
accuracy: 0.657 average_precision: 0.369 roc_auc: 0.303 recall_macro: 0.656 f1_macro: 0.578
Best model:
LogisticRegression(C=0.1, class_weight='balanced', max_iter=1000,
random_state=0)
Best Scores:
accuracy: 0.643 average_precision: 0.357 roc_auc: 0.299 recall_macro: 0.657 f1_macro: 0.571
Baseline Results
Best model: LogisticRegression(C=0.1, class_weight='balanced', max_iter=1000, random_state=0) Best Scores: Accuracy: 0.643, Average_precision: 0.357, roc_auc: 0.299, recall_macro: 0.657, f1_macro: 0.571
dabl.explain(BaseModel)
Naive Bayes is a probabilistic classifier inspired by the Bayes theorem under a simple assumption which is the attributes are conditionally independent.
The classification is conducted by deriving the maximum posterior which is the maximal P(Ci|X) with the above assumption applying to Bayes theorem. This assumption greatly reduces the computational cost by only counting the class distribution. Even though the assumption is not valid in most cases since the attributes are dependent, surprisingly Naive Bayes has able to perform impressively.
Naive Bayes is a very simple algorithm to implement and good results have obtained in most cases. It can be easily scalable to larger datasets since it takes linear time, rather than by expensive iterative approximation as used for many other types of classifiers.
Naive Bayes can suffer from a problem called the zero probability problem. When the conditional probability is zero for a particular attribute, it fails to give a valid prediction. This needs to be fixed explicitly using a Laplacian estimator.
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss
# Let's try a Naive Bayes classifier
nb_model = MultinomialNB()
nb_model.fit(x, y)
y_pred = nb_model.predict(x_test)
# Summary of the predictions made by the classifier
print(classification_report(y_test, y_pred))
print('Confusion Matrix')
print(confusion_matrix(y_test, y_pred))
# Accuracy score
NBAcc = accuracy_score(y_pred,y_test)
print('accuracy is', NBAcc)
test_predictions_proba = nb_model.predict_proba(x_test)
ll = log_loss(y_test, test_predictions_proba)
print("Log Loss: {:.4}".format(ll))
print("="*30)
# If our target is a numerical value we can also check MSE and RMSE.
# NOTE - This will not work here and cause an error since our pedictions are categorical
# mean_squared_error = mean_squared_error(y_test, predictions)
# mean_absolute_error = mean_absolute_error(y_test, predictions)
# result = {'mean_squared_error': mean_squared_error, 'mean_absolute_error': mean_absolute_error}
precision recall f1-score support
0 0.22 0.62 0.33 460
1 0.88 0.56 0.68 2223
accuracy 0.57 2683
macro avg 0.55 0.59 0.51 2683
weighted avg 0.76 0.57 0.62 2683
Confusion Matrix
[[ 284 176]
[ 979 1244]]
accuracy is 0.569511740588893
Log Loss: 0.6457
==============================
# contingency table of the actual and predicted score text
y_predicted = nb_model.predict(x_test)
ypred = pd.crosstab(y_test, y_predicted, rownames = ['Actual'], colnames = ['Predicted'])
ypred['Total'] = ypred.sum(axis=1)
ypred.loc['Total'] = ypred.sum()
ypred
| Predicted | 0 | 1 | Total |
|---|---|---|---|
| Actual | |||
| 0 | 284 | 176 | 460 |
| 1 | 979 | 1244 | 2223 |
| Total | 1263 | 1420 | 2683 |
from sklearn.metrics import plot_confusion_matrix
plot_confusion_matrix(nb_model, x_test, y_test)
plt.grid(visible=False)
plt.show()
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/utils/deprecation.py:87: FutureWarning: Function plot_confusion_matrix is deprecated; Function `plot_confusion_matrix` is deprecated in 1.0 and will be removed in 1.2. Use one of the class methods: ConfusionMatrixDisplay.from_predictions or ConfusionMatrixDisplay.from_estimator.
# Native Bayes using Grid Search
from sklearn.model_selection import GridSearchCV
params = {'alpha': [0.01, 0.1, 0.5, 1.0, 10.0]}
multinomial_nb_grid = GridSearchCV(MultinomialNB(), param_grid=params, n_jobs=-1, cv=10, verbose=5)
multinomial_nb_grid.fit(x,y)
print('Train Accuracy : %.3f'%multinomial_nb_grid.best_estimator_.score(x, y))
print('Test Accuracy : %.3f'%multinomial_nb_grid.best_estimator_.score(x_test, y_test))
GS_NBAcc = multinomial_nb_grid.best_score_
print('Best Accuracy Through Grid Search : %.3f'%GS_NBAcc)
print('Best Parameters : ',multinomial_nb_grid.best_params_)
Fitting 10 folds for each of 5 candidates, totalling 50 fits
Train Accuracy : 0.671
Test Accuracy : 0.571
Best Accuracy Through Grid Search : 0.645
Best Parameters : {'alpha': 0.01}
Decision Trees observes features of an object and trains a model in the structure of a tree to predict data in the future to produce meaningful continuous output. Continuous output means that the output/result is not discrete, i.e., it is not represented just by a discrete, known set of numbers or values
# Let's try a decision tree model
from sklearn.tree import DecisionTreeClassifier
dt01 = DecisionTreeClassifier(criterion="gini").fit(x,y)
prediction_dt01 = dt01.predict(x_test)
# Summary of the predictions made by the classifier
print(classification_report(y_test, prediction_dt01))
print('Confusion Matrix')
print(confusion_matrix(y_test, prediction_dt01))
# Accuracy score
DTAcc = accuracy_score(prediction_dt01,y_test)
print('accuracy is',DTAcc)
test_predictions_proba = dt01.predict_proba(x_test)
ll = log_loss(y_test, test_predictions_proba)
print("Log Loss: {:.4}".format(ll))
print("="*30)
precision recall f1-score support
0 0.71 0.92 0.80 460
1 0.98 0.92 0.95 2223
accuracy 0.92 2683
macro avg 0.85 0.92 0.88 2683
weighted avg 0.94 0.92 0.93 2683
Confusion Matrix
[[ 424 36]
[ 174 2049]]
accuracy is 0.9217294073797987
Log Loss: 0.1326
==============================
# contingency table of the actual and predicted score text
y_predicted = dt01.predict(x_test)
ypred = pd.crosstab(y_test, y_predicted, rownames = ['Actual'], colnames = ['Predicted'])
ypred['Total'] = ypred.sum(axis=1)
ypred.loc['Total'] = ypred.sum()
ypred
| Predicted | 0 | 1 | Total |
|---|---|---|---|
| Actual | |||
| 0 | 424 | 36 | 460 |
| 1 | 174 | 2049 | 2223 |
| Total | 598 | 2085 | 2683 |
Feature Importance of Decision Tree
fi = dt01.feature_importances_ #feature importance array
fi = pd.Series(data = fi, index = x.columns) #convert to Pandas series for plotting
fi.sort_values(ascending=False, inplace=True) #sort descending
#create bar plot
plt.figure(figsize=(20, 20))
chart = sns.barplot(x=fi, y=fi.index, palette=sns.color_palette("viridis_r", n_colors=len(fi)))
chart.set_xticklabels(chart.get_xticklabels(), rotation=45, horizontalalignment='right')
plt.show()
/var/folders/24/536gs7r91qzd964t2ppqhs6m0000gn/T/ipykernel_88804/2019358956.py:4: UserWarning: FixedFormatter should only be used together with FixedLocator
# Decision Tree using Grid Search
params = {'max_depth': [3, 5, 10, 20, 30],
'criterion': ["gini", "entropy"],
'splitter': ["best", "random"]}
dt_grid = GridSearchCV(DecisionTreeClassifier(), param_grid=params, n_jobs=-1, cv=10, verbose=5)
dt_grid.fit(x,y)
print('Train Accuracy : %.3f'%dt_grid.best_estimator_.score(x, y))
print('Test Accuracy : %.3f'%dt_grid.best_estimator_.score(x_test, y_test))
GS_DTAcc = dt_grid.best_score_
print('Best Accuracy Through Grid Search : %.3f'%GS_DTAcc)
print('Best Parameters : ',dt_grid.best_params_)
Fitting 10 folds for each of 20 candidates, totalling 200 fits
Train Accuracy : 0.928
Test Accuracy : 0.892
Best Accuracy Through Grid Search : 0.781
Best Parameters : {'criterion': 'gini', 'max_depth': 30, 'splitter': 'best'}
Random forest is a commonly-used machine learning algorithm, which combines the output of multiple decision trees to reach a single result. Its ease of use and flexibility have fueled its adoption, as it handles both classification and regression problems.
# Let's try a Random Forest model
from sklearn.ensemble import RandomForestClassifier
rf01 = RandomForestClassifier(n_estimators = 100,criterion="gini").fit(x,y)
prediction_rf01 = rf01.predict(x_test)
# Summary of the predictions made by the classifier
print(classification_report(y_test, prediction_rf01))
print('Confusion Matrix')
print(confusion_matrix(y_test, prediction_rf01))
# Accuracy score
RFAcc = accuracy_score(prediction_rf01,y_test)
print('accuracy is', RFAcc)
test_predictions_proba = rf01.predict_proba(x_test)
ll = log_loss(y_test, test_predictions_proba)
print("Log Loss: {:.4}".format(ll))
print("="*30)
precision recall f1-score support
0 0.73 0.88 0.80 460
1 0.97 0.93 0.95 2223
accuracy 0.92 2683
macro avg 0.85 0.91 0.87 2683
weighted avg 0.93 0.92 0.93 2683
Confusion Matrix
[[ 406 54]
[ 153 2070]]
accuracy is 0.9228475587029444
Log Loss: 0.2031
==============================
# contingency table of the actual and predicted score text
y_predicted = rf01.predict(x_test)
ypred = pd.crosstab(y_test, y_predicted, rownames = ['Actual'], colnames = ['Predicted'])
ypred['Total'] = ypred.sum(axis=1)
ypred.loc['Total'] = ypred.sum()
ypred
| Predicted | 0 | 1 | Total |
|---|---|---|---|
| Actual | |||
| 0 | 406 | 54 | 460 |
| 1 | 153 | 2070 | 2223 |
| Total | 559 | 2124 | 2683 |
def plot_confusion_matrix(Y_test, Y_preds):
conf_mat = confusion_matrix(Y_test, Y_preds)
#print(conf_mat)
fig = plt.figure(figsize=(6,6))
plt.matshow(conf_mat, cmap=plt.cm.Blues, fignum=1)
plt.yticks(range(2), range(2))
plt.xticks(range(2), range(2))
plt.colorbar()
plt.grid(visible=False);
for i in range(2):
for j in range(2):
plt.text(i-0.2,j+0.1, str(conf_mat[j, i]), color='tab:red')
plot_confusion_matrix(y_test, rf01.predict(x_test))
/var/folders/24/536gs7r91qzd964t2ppqhs6m0000gn/T/ipykernel_88804/3873633094.py:8: MatplotlibDeprecationWarning: Auto-removal of grids by pcolor() and pcolormesh() is deprecated since 3.5 and will be removed two minor releases later; please call grid(False) first.
# Random Forrest using Grid Search
params = {'max_depth': [3, 5, 10, 20, 30],
'n_estimators' : [10, 100],
'criterion': ["gini", "entropy", "log_loss"]}
rf_grid = GridSearchCV(RandomForestClassifier(), param_grid=params, n_jobs=-1, cv=10, verbose=5)
rf_grid.fit(x,y)
print('Train Accuracy : %.3f'%rf_grid.best_estimator_.score(x, y))
print('Test Accuracy : %.3f'%rf_grid.best_estimator_.score(x_test, y_test))
GS_RFAcc = rf_grid.best_score_
print('Best Accuracy Through Grid Search : %.3f'%GS_RFAcc)
print('Best Parameters : ',rf_grid.best_params_)
Fitting 10 folds for each of 30 candidates, totalling 300 fits
Train Accuracy : 0.937
Test Accuracy : 0.907
Best Accuracy Through Grid Search : 0.809
Best Parameters : {'criterion': 'gini', 'max_depth': 30, 'n_estimators': 100}
Support Vector Machine, SVM, can be used for both regression and classification tasks. But, it is widely used in classification objectives. The objective of the support vector machine algorithm is to find a hyperplane in an N-dimensional space(N — the number of features) that distinctly classifies the data points.
To separate the two classes of data points, there are many possible hyperplanes that could be chosen. Our objective is to find a plane that has the maximum margin, i.e the maximum distance between data points of both classes. Maximizing the margin distance provides some reinforcement so that future data points can be classified with more confidence.
# Let's try a Support Vector Machine model
from sklearn import svm
svm_model = svm.SVC(decision_function_shape='ovo', probability=True).fit(x,y)
prediction_svm = svm_model.predict(x_test)
# Summary of the predictions made by the classifier
print(classification_report(y_test, prediction_svm))
print('Confusion Matrix')
print(confusion_matrix(y_test, prediction_svm))
# Accuracy score
SVMAcc = accuracy_score(prediction_svm,y_test)
print('accuracy is', SVMAcc)
test_predictions_proba = svm_model.predict_proba(x_test)
ll = log_loss(y_test, test_predictions_proba)
print("Log Loss: {:.4}".format(ll))
print("="*30)
precision recall f1-score support
0 0.22 0.74 0.34 460
1 0.90 0.47 0.62 2223
accuracy 0.52 2683
macro avg 0.56 0.61 0.48 2683
weighted avg 0.78 0.52 0.57 2683
Confusion Matrix
[[ 342 118]
[1181 1042]]
accuracy is 0.5158404770778979
Log Loss: 0.6491
==============================
# contingency table of the actual and predicted score text
y_predicted = svm_model.predict(x_test)
ypred = pd.crosstab(y_test, y_predicted, rownames = ['Actual'], colnames = ['Predicted'])
ypred['Total'] = ypred.sum(axis=1)
ypred.loc['Total'] = ypred.sum()
ypred
| Predicted | 0 | 1 | Total |
|---|---|---|---|
| Actual | |||
| 0 | 342 | 118 | 460 |
| 1 | 1181 | 1042 | 2223 |
| Total | 1523 | 1160 | 2683 |
# SVM using Grid Search
params = {'kernel' : ['linear', 'poly', 'rbf', 'sigmoid'],
'decision_function_shape' : ['ovr', 'ovo']}
svm_grid = GridSearchCV(svm.SVC(), param_grid=params, n_jobs=-1, cv=10, verbose=5)
svm_grid.fit(x,y)
print('Train Accuracy : %.3f'%svm_grid.best_estimator_.score(x, y))
print('Test Accuracy : %.3f'%svm_grid.best_estimator_.score(x_test, y_test))
GS_SVMAcc = svm_grid.best_score_
print('Best Accuracy Through Grid Search : %.3f'%GS_SVMAcc)
print('Best Parameters : ',svm_grid.best_params_)
Fitting 10 folds for each of 8 candidates, totalling 80 fits
Train Accuracy : 0.711
Test Accuracy : 0.717
Best Accuracy Through Grid Search : 0.694
Best Parameters : {'decision_function_shape': 'ovr', 'kernel': 'linear'}
Multi-layer Perceptron (MLP) is a supervised learning algorithm that learns a function by training on a dataset. Given a set of features and a target , it can learn a non-linear function approximator for either classification or regression. It is different from logistic regression, in that between the input and the output layer, there can be one or more non-linear layers, called hidden layers.
# Let's try a Neural Network model
from sklearn.neural_network import MLPClassifier
NeuralNetwork_model = MLPClassifier(solver='lbfgs', alpha=1e-5, max_iter=1000, hidden_layer_sizes=(5, 2), random_state=1).fit(x,y)
prediction_NeuralNetwork = NeuralNetwork_model.predict(x_test)
# Summary of the predictions made by the classifier
print(classification_report(y_test, prediction_NeuralNetwork))
print('Confusion Matrix')
print(confusion_matrix(y_test, prediction_NeuralNetwork))
# Accuracy score
NNAcc = accuracy_score(prediction_NeuralNetwork,y_test)
print('accuracy is', NNAcc)
test_predictions_proba = NeuralNetwork_model.predict_proba(x_test)
ll = log_loss(y_test, test_predictions_proba)
print("Log Loss: {:.4}".format(ll))
print("="*30)
precision recall f1-score support
0 0.24 0.32 0.28 460
1 0.85 0.79 0.82 2223
accuracy 0.71 2683
macro avg 0.55 0.56 0.55 2683
weighted avg 0.75 0.71 0.73 2683
Confusion Matrix
[[ 148 312]
[ 457 1766]]
accuracy is 0.7133805441669773
Log Loss: 0.538
==============================
# Neural Network using Grid Search
params = {'solver' : ['lbfgs', 'adam'],
'learning_rate_init': [0.0001],
'max_iter': [1000],
'hidden_layer_sizes': [(50, 40, 30, 20, 10)],
'activation': ['logistic'],
'alpha': [0.0001, 0.001, 0.005],
'early_stopping': [True, False]}
NeuralNetwork_grid = GridSearchCV(MLPClassifier(), param_grid=params, n_jobs=-1, cv=10, verbose=5)
NeuralNetwork_grid.fit(x,y)
print('Train Accuracy : %.3f'%NeuralNetwork_grid.best_estimator_.score(x, y))
print('Test Accuracy : %.3f'%NeuralNetwork_grid.best_estimator_.score(x_test, y_test))
GS_NNAcc = NeuralNetwork_grid.best_score_
print('Best Accuracy Through Grid Search : %.3f'%GS_NNAcc)
print('Best Parameters : ',NeuralNetwork_grid.best_params_)
Fitting 10 folds for each of 12 candidates, totalling 120 fits
/Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn( /Users/jeremysloan/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but MLPClassifier was fitted with feature names warnings.warn(
Train Accuracy : 0.538
Test Accuracy : 0.425
Best Accuracy Through Grid Search : 0.505
Best Parameters : {'activation': 'logistic', 'alpha': 0.005, 'early_stopping': True, 'hidden_layer_sizes': (50, 40, 30, 20, 10), 'learning_rate_init': 0.0001, 'max_iter': 1000, 'solver': 'lbfgs'}
compare = pd.DataFrame({'Model': ['Decision Tree', 'Random Forest', 'Naive Bayes', 'SVM', 'Neural Network'],
'Accuracy': [DTAcc*100, RFAcc*100, NBAcc*100, SVMAcc*100, NNAcc*100]})
# round to two decimal places and sort
compare = compare.sort_values(by='Accuracy', ascending=False)
compare = compare.round(decimals = 2)
compare
| Model | Accuracy | |
|---|---|---|
| 1 | Random Forest | 92.28 |
| 0 | Decision Tree | 92.17 |
| 4 | Neural Network | 71.34 |
| 2 | Naive Bayes | 56.95 |
| 3 | SVM | 51.58 |
#pip install Kaleido
# Use Plotly and Kaleido to save table as png
import plotly.figure_factory as ff
import kaleido
fig = ff.create_table(compare)
fig.update_layout(
autosize=False,
width=300,
height=150,
)
fig.write_image("SMOTEModelTable.png", scale=1.5)
sns.set_theme(style="darkgrid")
sns.barplot(data=compare.sort_values(by='Accuracy', ascending=False), x='Model', y='Accuracy', palette="viridis_r")
plt.ylabel('Accuracy Percentage', fontsize=18, weight='bold')
plt.xlabel('Model', fontsize=18, weight='bold')
plt.title('SMOTE Model Accuracy', fontsize=26, weight='bold')
plt.ylim(1,100)
fig=plt.gcf()
fig.set_size_inches(20,10)
plt.show()
GS_compare = pd.DataFrame({'Model': ['Decision Tree', 'Random Forest', 'Naive Bayes', 'SVM', 'Neural Network'],
'Accuracy': [GS_DTAcc*100, GS_RFAcc*100, GS_NBAcc*100, GS_SVMAcc*100, GS_NNAcc*100]})
# round to two decimal places in python pandas
#GS_compare.options.display.float_format = '{:.2f}'.format
GS_compare = GS_compare.sort_values(by='Accuracy', ascending=False)
GS_compare = GS_compare.round(decimals = 2)
GS_compare
| Model | Accuracy | |
|---|---|---|
| 1 | Random Forest | 80.91 |
| 0 | Decision Tree | 78.05 |
| 3 | SVM | 69.35 |
| 2 | Naive Bayes | 64.54 |
| 4 | Neural Network | 50.45 |
# Use Plotly and Kaleido to save table as png
fig = ff.create_table(GS_compare)
fig.update_layout(
autosize=False,
width=300,
height=150,
)
fig.write_image("SMOTEGSModelTable.png", scale=1.5)
sns.set_theme(style="darkgrid")
sns.barplot(data=GS_compare.sort_values(by='Accuracy', ascending=False), x='Model', y='Accuracy', palette="viridis_r")
plt.ylabel('Accuracy Percentage', fontsize=18, weight='bold')
plt.xlabel('Model', fontsize=18, weight='bold')
plt.title('SMOTE Grid Search Model Accuracy', fontsize=26, weight='bold')
plt.ylim(1,100)
fig=plt.gcf()
fig.set_size_inches(20,10)
plt.show()